{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/mengfanxu/miniconda3/envs/transmla/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from llama.modeling_llama import LlamaForCausalLM\n",
    "from transformers import AutoTokenizer\n",
    "import torch\n",
    "from copy import deepcopy\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of LlamaForCausalLM were not initialized from the model checkpoint at /data2/mengfanxu/huggingface/Llama-3.2-1B-Instruct and are newly initialized: ['model.layers.0.self_attn.k_up_proj.weight', 'model.layers.0.self_attn.v_up_proj.weight', 'model.layers.1.self_attn.k_up_proj.weight', 'model.layers.1.self_attn.v_up_proj.weight', 'model.layers.10.self_attn.k_up_proj.weight', 'model.layers.10.self_attn.v_up_proj.weight', 'model.layers.11.self_attn.k_up_proj.weight', 'model.layers.11.self_attn.v_up_proj.weight', 'model.layers.12.self_attn.k_up_proj.weight', 'model.layers.12.self_attn.v_up_proj.weight', 'model.layers.13.self_attn.k_up_proj.weight', 'model.layers.13.self_attn.v_up_proj.weight', 'model.layers.14.self_attn.k_up_proj.weight', 'model.layers.14.self_attn.v_up_proj.weight', 'model.layers.15.self_attn.k_up_proj.weight', 'model.layers.15.self_attn.v_up_proj.weight', 'model.layers.2.self_attn.k_up_proj.weight', 'model.layers.2.self_attn.v_up_proj.weight', 'model.layers.3.self_attn.k_up_proj.weight', 'model.layers.3.self_attn.v_up_proj.weight', 'model.layers.4.self_attn.k_up_proj.weight', 'model.layers.4.self_attn.v_up_proj.weight', 'model.layers.5.self_attn.k_up_proj.weight', 'model.layers.5.self_attn.v_up_proj.weight', 'model.layers.6.self_attn.k_up_proj.weight', 'model.layers.6.self_attn.v_up_proj.weight', 'model.layers.7.self_attn.k_up_proj.weight', 'model.layers.7.self_attn.v_up_proj.weight', 'model.layers.8.self_attn.k_up_proj.weight', 'model.layers.8.self_attn.v_up_proj.weight', 'model.layers.9.self_attn.k_up_proj.weight', 'model.layers.9.self_attn.v_up_proj.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LlamaForCausalLM(\n",
       "  (model): LlamaModel(\n",
       "    (embed_tokens): Embedding(128256, 2048)\n",
       "    (layers): ModuleList(\n",
       "      (0-15): 16 x LlamaDecoderLayer(\n",
       "        (self_attn): LlamaMLAttention(\n",
       "          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
       "          (k_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
       "          (k_up_proj): Linear(in_features=512, out_features=2048, bias=False)\n",
       "          (v_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
       "          (v_up_proj): Linear(in_features=512, out_features=2048, bias=False)\n",
       "          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
       "          (rotary_emb): LlamaRotaryEmbedding()\n",
       "        )\n",
       "        (mlp): LlamaMLP(\n",
       "          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
       "          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
       "          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)\n",
       "          (act_fn): SiLU()\n",
       "        )\n",
       "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
       "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
       "      )\n",
       "    )\n",
       "    (norm): LlamaRMSNorm((2048,), eps=1e-05)\n",
       "    (rotary_emb): LlamaRotaryEmbedding()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=2048, out_features=128256, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LlamaForCausalLM.from_pretrained(\"/data2/mengfanxu/huggingface/Llama-3.2-1B-Instruct\", attn_implementation=\"eager\", device_map='cuda:0')\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"/data2/mengfanxu/huggingface/Llama-3.2-1B-Instruct\")\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "hidden_size = model.config.hidden_size\n",
    "n_heads = model.config.num_attention_heads\n",
    "kv_heads = model.config.num_key_value_heads\n",
    "head_dim = model.config.head_dim\n",
    "kv_groups = model.config.num_attention_heads // model.config.num_key_value_heads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Insert identity matrices\n",
    "for name,module in model.named_modules():\n",
    "    if 'k_up_proj' in name or \"v_up_proj\" in name:\n",
    "        module.weight.data = torch.stack([torch.eye(kv_heads*head_dim).reshape(kv_heads, head_dim, kv_heads*head_dim)]*kv_groups,dim=1).reshape(hidden_size, kv_heads*head_dim).contiguous().to(module.weight.data.device,module.weight.data.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
      "/home/mengfanxu/miniconda3/envs/transmla/lib/python3.10/site-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['<|begin_of_text|>Give me a short introduction to large language model. A large language model is a type of artificial']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output = model.generate(**tokenizer(\"Give me a short introduction to large language model.\",return_tensors=\"pt\").to(\"cuda:0\"))\n",
    "tokenizer.batch_decode(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "263it [00:42,  6.26it/s]\n"
     ]
    }
   ],
   "source": [
    "for name,module in tqdm(model.named_modules()):\n",
    "    if name.endswith(\"self_attn\"):\n",
    "        # Orthogonal k_proj and k_up_proj\n",
    "        k_up_weight = deepcopy(module.k_up_proj.weight.data).reshape(hidden_size, kv_heads, head_dim) # (hidden_size, kv_heads, head_dim)\n",
    "        k_weight = deepcopy(module.k_proj.weight.data).reshape(kv_heads, head_dim, hidden_size) # (kv_heads, head_dim, hidden_size)\n",
    "        if module.k_proj.bias is not None:\n",
    "            k_weight = torch.cat([k_weight,deepcopy(module.k_proj.bias.data).reshape(kv_heads, head_dim,1)],dim=-1)\n",
    "        k_up_k = torch.einsum(\"Dhd,hdL->hDL\",k_up_weight, k_weight) # (kv_heads, hidden_size, hidden_size), rank<=head_dim\n",
    "        U,S,V = torch.svd_lowrank(k_up_k, head_dim, niter=head_dim) # U(kv_heads, hidden_size, head_dim), S(kv_heads, head_dim), V(kv_heads, hidden_size, head_dim)\n",
    "        US_sqrt = torch.einsum('hDd,hd->Dhd',U,torch.sqrt(S)) # (latent_dim, kv_heads, head_dim)\n",
    "        S_sqrtV = torch.einsum('hd,hLd->hdL',torch.sqrt(S),V) # (kv_heads, head_dim, latent_dim)\n",
    "        if module.k_proj.bias is not None:\n",
    "            module.k_proj.bias.data = S_sqrtV[:,:,-1].reshape(-1).contiguous()\n",
    "            S_sqrtV = S_sqrtV[:,:,:-1]\n",
    "        module.k_up_proj.weight.data = US_sqrt.reshape(hidden_size, kv_heads*head_dim).contiguous()\n",
    "        module.k_proj.weight.data = S_sqrtV.reshape(kv_heads*head_dim, hidden_size).contiguous()\n",
    "        # Orthogonal v_proj and v_up_proj\n",
    "        v_up_weight = deepcopy(module.v_up_proj.weight.data).reshape(hidden_size, kv_heads, head_dim) # (hidden_size, kv_heads, head_dim)\n",
    "        v_weight = deepcopy(module.v_proj.weight.data).reshape(kv_heads, head_dim, hidden_size) # (kv_heads, head_dim, hidden_size)\n",
    "        if module.v_proj.bias is not None:\n",
    "            v_weight = torch.cat([v_weight,deepcopy(module.v_proj.bias.data).reshape(kv_heads, head_dim,1)],dim=-1)\n",
    "        v_up_v = torch.einsum(\"Dhd,hdL->hDL\",v_up_weight, v_weight) # (kv_heads, hidden_size, hidden_size), rank<=head_dim\n",
    "        U,S,V = torch.svd_lowrank(v_up_v, head_dim, niter=head_dim) # U(kv_heads, hidden_size, head_dim), S(kv_heads, head_dim), V(kv_heads, hidden_size, head_dim)\n",
    "        US_sqrt = torch.einsum('hDd,hd->Dhd',U,torch.sqrt(S)) # (latent_dim, kv_heads, head_dim)\n",
    "        S_sqrtV = torch.einsum('hd,hLd->hdL',torch.sqrt(S),V) # (kv_heads, head_dim, latent_dim)\n",
    "        if module.v_proj.bias is not None:\n",
    "            module.v_proj.bias.data = S_sqrtV[:,:,-1].reshape(-1).contiguous()\n",
    "            S_sqrtV = S_sqrtV[:,:,:-1]\n",
    "        module.v_up_proj.weight.data = US_sqrt.reshape(hidden_size, kv_heads*head_dim).contiguous()\n",
    "        module.v_proj.weight.data = S_sqrtV.reshape(kv_heads*head_dim, hidden_size).contiguous()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['<|begin_of_text|>Give me a short introduction to large language model. Explain how they work and what their strengths and']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output = model.generate(**tokenizer(\"Give me a short introduction to large language model.\",return_tensors=\"pt\").to(\"cuda:0\"))\n",
    "tokenizer.batch_decode(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained(\"llama3.2_1b_instruct_transMLA\")\n",
    "#model.push_to_hub(\"fxmeng/llama3.2_1b_instruct_transMLA\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('llama3.2_1b_instruct_transMLA/tokenizer_config.json',\n",
       " 'llama3.2_1b_instruct_transMLA/special_tokens_map.json',\n",
       " 'llama3.2_1b_instruct_transMLA/tokenizer.json')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.save_pretrained(\"llama3.2_1b_instruct_transMLA\")\n",
    "#tokenizer.push_to_hub(\"fxmeng/llama3.2_1b_instruct_transMLA\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "99014ac9a2e540b9ab50b92dc2a5ba48",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
      "/home/mfx/miniconda3/envs/bitsfit/lib/python3.10/site-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['<|begin_of_text|>1+1=2\\nI love my life.\\nThe phrase \"1+1=2']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LlamaForCausalLM.from_pretrained(\"fxmeng/llama3.2_1b_instruct_transMLA\", attn_implementation=\"eager\", device_map='cuda:0')\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"fxmeng/llama3.2_1b_instruct_transMLA\")\n",
    "output = model.generate(**tokenizer(\"Give me a short introduction to large language model.\",return_tensors=\"pt\").to(\"cuda:0\"))\n",
    "tokenizer.batch_decode(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.colors import ListedColormap\n",
    "cmap = ListedColormap([\"white\", \"blue\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x7f58b3fa7400>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAAGiCAYAAAC/NyLhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAhSklEQVR4nO3dbXBU1eHH8V9CkuVxNwbILqlEcXzAKGAFDVt1nEpKtBmrJS8sw2BqGR1pYMRYWtMqqO00DM7UaqvYGVvxhUpNp2hFUGOQUHV5ilDDgyk61KTKJiqT3UBlA8n5v+CfqysB2Tzs7tn9fmbuDLn37O65p+l+3ezdJMMYYwQAgCUyEz0BAABiQbgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFZJWLgef/xxnXvuuRo+fLiKi4u1bdu2RE0FAGCRhITrr3/9q6qqqrR8+XK9++67mjZtmkpLS9Xe3p6I6QAALJKRiF+yW1xcrCuuuEJ//OMfJUk9PT2aOHGiFi9erHvvvTfe0wEAWCQr3g/Y1dWlxsZGVVdXO/syMzNVUlKiQCDQ520ikYgikYjzdU9Pjw4dOqSxY8cqIyNjyOcMABhcxhh1dnaqoKBAmZmx/fAv7uH67LPP1N3dLa/XG7Xf6/Xq/fff7/M2NTU1evDBB+MxPQBAHLW2turss8+O6TZxD1d/VFdXq6qqyvk6FAqpsLBQra2tmjjRrVAogZMDAMQsHA5r4sSJGjNmTMy3jXu4xo0bp2HDhqmtrS1qf1tbm3w+X5+3cblccrlcJ+13u90yxq2MDIk/hwkA9unP2z1xv6owJydH06dPV319vbOvp6dH9fX18vv9/bpPYyTe6gKA9JCQHxVWVVWpoqJCM2bM0JVXXqnf//73OnLkiG677bZ+32dvvHjlBQCpLSHhuuWWW/Tpp59q2bJlCgaDuuyyy/Tqq6+edMFGrIgXAKS+hHyOa6DC4bA8Ho9CoZDcbvdJx4kXACS3b3oeP52U/F2FvOcFAKkrJcMlES8ASFUpGy6JeAFAKkrpcEm81wUAqSblwwUASC2ECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIVZxkZiZ4BANiNcMWZMcQLAAaCcCUA8QKA/iNcCUK8AKB/CFcCES8AiB3hSjDiBQCxIVxJgHgBwJkjXEmCeAHAmSFcSYR4AcA3I1xJhngBwOkRriREvADg1AhXkiJeANA3wpXEiBcAnIxwJTniBQDRCJcFiBcAfIlwWYJ4AcAJhMsixAsACJd1iBeAdEe4LES8AKQzwmUp4gUgXREuixEvAOmIcFmOeAFIN4QrBRAvAOkk5nBt3rxZN954owoKCpSRkaEXX3wx6rgxRsuWLdOECRM0YsQIlZSUaP/+/VFjDh06pHnz5sntdis3N1cLFizQ4cOHB3Qi6Y54AUgXMYfryJEjmjZtmh5//PE+j69cuVKPPfaYnnzySW3dulWjRo1SaWmpjh496oyZN2+e9uzZo7q6Oq1bt06bN2/WHXfc0f+zgCTiBSBNmAGQZNauXet83dPTY3w+n3n44YedfR0dHcblcpnnn3/eGGPM3r17jSSzfft2Z8yGDRtMRkaG+fjjj8/ocUOhkJFkQqHQQKafsgb2vyoADL2BPI8P6ntcBw4cUDAYVElJibPP4/GouLhYgUBAkhQIBJSbm6sZM2Y4Y0pKSpSZmamtW7f2eb+RSEThcDhqw6nxygtAKhvUcAWDQUmS1+uN2u/1ep1jwWBQ+fn5UcezsrKUl5fnjPm6mpoaeTweZ5s4ceJgTjslES8AqcqKqwqrq6sVCoWcrbW1NdFTsgLxApCKBjVcPp9PktTW1ha1v62tzTnm8/nU3t4edfz48eM6dOiQM+brXC6X3G531IYzQ7wApJpBDdekSZPk8/lUX1/v7AuHw9q6dav8fr8kye/3q6OjQ42Njc6YjRs3qqenR8XFxYM5Hfw/4gUglWTFeoPDhw/rgw8+cL4+cOCAdu3apby8PBUWFmrJkiX6zW9+owsuuECTJk3S/fffr4KCAt18882SpIsvvljXX3+9br/9dj355JM6duyYFi1apB/96EcqKCgYtBNDtN54GZPomQDAwMQcrh07dui73/2u83VVVZUkqaKiQqtXr9bPf/5zHTlyRHfccYc6Ojp09dVX69VXX9Xw4cOd2zz77LNatGiRZs2apczMTJWXl+uxxx4bhNPB6RAvAKkgwxj7nsbC4bA8Ho9CoRDvd/UD8QKQaAN5HrfiqkIMLt7zAmAzwpWmiBcAWxGuNEa8ANiIcKU54gXANoQLxAuAVQgXJBEvAPYgXHAQLwA2IFyIQrwAJDvChZMQLwDJjHChT8QLQLIiXDgl4gUgGREunBbxApBsCBe+EfECkEwIF84I8QKQLAgXzhjxApAMCBdiQrwAJBrhQsz4I5QAEolwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXEh5GRmJngGAwUS4kPKMIV5AKokpXDU1Nbriiis0ZswY5efn6+abb1Zzc3PUmKNHj6qyslJjx47V6NGjVV5erra2tqgxLS0tKisr08iRI5Wfn6+lS5fq+PHjAz8b4BSIF5A6YgpXQ0ODKisrtWXLFtXV1enYsWOaPXu2jhw54oy5++679fLLL6u2tlYNDQ365JNPNGfOHOd4d3e3ysrK1NXVpXfeeUfPPPOMVq9erWXLlg3eWQF9IF5AijAD0N7ebiSZhoYGY4wxHR0dJjs729TW1jpj9u3bZySZQCBgjDFm/fr1JjMz0wSDQWfMqlWrjNvtNpFI5IweNxQKGUkmFAoNZPpIUwP7rgcwGAbyPD6g97hCoZAkKS8vT5LU2NioY8eOqaSkxBkzefJkFRYWKhAISJICgYCmTJkir9frjCktLVU4HNaePXv6fJxIJKJwOBy1Af3FKy/Abv0OV09Pj5YsWaKrrrpKl156qSQpGAwqJydHubm5UWO9Xq+CwaAz5qvR6j3ee6wvNTU18ng8zjZx4sT+ThuQRLwAm/U7XJWVldq9e7fWrFkzmPPpU3V1tUKhkLO1trYO+WMi9REvwE5Z/bnRokWLtG7dOm3evFlnn322s9/n86mrq0sdHR1Rr7ra2trk8/mcMdu2bYu6v96rDnvHfJ3L5ZLL5erPVIHT6o2XMYmeCYAzFdMrLmOMFi1apLVr12rjxo2aNGlS1PHp06crOztb9fX1zr7m5ma1tLTI7/dLkvx+v5qamtTe3u6Mqaurk9vtVlFR0UDOBegXXnkBdonpFVdlZaWee+45vfTSSxozZozznpTH49GIESPk8Xi0YMECVVVVKS8vT263W4sXL5bf79fMmTMlSbNnz1ZRUZHmz5+vlStXKhgM6r777lNlZSWvqpAwvPIC7JFhzJn/XzXjFP9Z+vTTT+vHP/6xpBMfQL7nnnv0/PPPKxKJqLS0VE888UTUjwE/+ugjLVy4UJs2bdKoUaNUUVGhFStWKCvrzDoaDofl8XgUCoXkdrvPdPrANyJeQHwM5Hk8pnAlC8KFoUS8gKE3kOdxflch8DW85wUkN8IF9IF4AcmLcAGnQLyA5ES4gNMgXkDyIVzANyBeQHIhXMAZIF5A8iBcwBkiXkByIFxADIgXkHiEC4gR8QISi3AB/UC8gMQhXEA/ES8gMQgXMADEC4g/wgUMEPEC4otwAYOAeAHxQ7iAQUK8gPggXMAgIl7A0CNcwCAjXsDQIlzAECBewNAhXMAQIV7A0CBcwBAiXsDgI1zAECNewOAiXEAcEC9g8BAuIE6IFzA4CBcQR8QLGDjCBcQZ8QIGhnABCUC8gP4jXECCEC+gfwgXkEDGJHoGgH0IFwDAKoQLAGAVwgUAsArhAgBYhXABAKxCuIAkxaXyQN8IF5Ck+JwX0DfCBSQx4gWcjHABSY54AdEIF2AB4gV8iXABliBewAmEC7AI8QIIF2Ad4oV0R7gACxEvpDPCBViKeCFdES7AYsQL6YhwAZYjXkg3hAtIAcQL6YRwASmCeCFdEC4ghRAvpAPCBaQY4oVUR7iAFES8kMoIF5CijEn0DIChQbgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVYgrXqlWrNHXqVLndbrndbvn9fm3YsME5fvToUVVWVmrs2LEaPXq0ysvL1dbWFnUfLS0tKisr08iRI5Wfn6+lS5fq+PHjg3M2AICUF1O4zj77bK1YsUKNjY3asWOHrrvuOt10003as2ePJOnuu+/Wyy+/rNraWjU0NOiTTz7RnDlznNt3d3errKxMXV1deuedd/TMM89o9erVWrZs2eCeFQAgdZkBOuuss8xTTz1lOjo6THZ2tqmtrXWO7du3z0gygUDAGGPM+vXrTWZmpgkGg86YVatWGbfbbSKRyBk/ZigUMpJMKBQa6PQBAAkwkOfxfr/H1d3drTVr1ujIkSPy+/1qbGzUsWPHVFJS4oyZPHmyCgsLFQgEJEmBQEBTpkyR1+t1xpSWliocDjuv2voSiUQUDoejNgBAeoo5XE1NTRo9erRcLpfuvPNOrV27VkVFRQoGg8rJyVFubm7UeK/Xq2AwKEkKBoNR0eo93nvsVGpqauTxeJxt4sSJsU4bAJAiYg7XRRddpF27dmnr1q1auHChKioqtHfv3qGYm6O6ulqhUMjZWltbh/TxAADJKyvWG+Tk5Oj888+XJE2fPl3bt2/Xo48+qltuuUVdXV3q6OiIetXV1tYmn88nSfL5fNq2bVvU/fVeddg7pi8ul0sulyvWqQIAUtCAP8fV09OjSCSi6dOnKzs7W/X19c6x5uZmtbS0yO/3S5L8fr+amprU3t7ujKmrq5Pb7VZRUdFApwIASAMxveKqrq7WDTfcoMLCQnV2duq5557Tpk2b9Nprr8nj8WjBggWqqqpSXl6e3G63Fi9eLL/fr5kzZ0qSZs+eraKiIs2fP18rV65UMBjUfffdp8rKSl5RAQDOSEzham9v16233qqDBw/K4/Fo6tSpeu211/S9731PkvTII48oMzNT5eXlikQiKi0t1RNPPOHcftiwYVq3bp0WLlwov9+vUaNGqaKiQg899NDgnhUAIGVlGGNMoicRq3A4LI/Ho1AoJLfbnejpAABiNJDncX5XIQDAKoQLAGAVwgUAsArhAgBYhXABAKxCuAAAViFcAAZVRkaiZ4BUR7gADCr7PhkK2xAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIASRkZiZ4BzhThAgBJxhAvWxAuAPh/xMsOhAsAvoJ4JT/CBQBfQ7ySG+ECgD4Qr+RFuADgFIhXciJcAHAaxCv5EC4A+AbEK7kMKFwrVqxQRkaGlixZ4uw7evSoKisrNXbsWI0ePVrl5eVqa2uLul1LS4vKyso0cuRI5efna+nSpTp+/PhApgIAQ4p4JY9+h2v79u3605/+pKlTp0btv/vuu/Xyyy+rtrZWDQ0N+uSTTzRnzhzneHd3t8rKytTV1aV33nlHzzzzjFavXq1ly5b1/ywAIA6IV5Iw/dDZ2WkuuOACU1dXZ6699lpz1113GWOM6ejoMNnZ2aa2ttYZu2/fPiPJBAIBY4wx69evN5mZmSYYDDpjVq1aZdxut4lEImf0+KFQyEgyoVCoP9MHgAHp3zMnvmogz+P9esVVWVmpsrIylZSURO1vbGzUsWPHovZPnjxZhYWFCgQCkqRAIKApU6bI6/U6Y0pLSxUOh7Vnz54+Hy8SiSgcDkdtAJAovPJKrKxYb7BmzRq9++672r59+0nHgsGgcnJylJubG7Xf6/UqGAw6Y74ard7jvcf6UlNTowcffDDWqQLAkOmNlzGJnkn6iekVV2trq+666y49++yzGj58+FDN6STV1dUKhULO1traGrfHBoBT4ZVXYsQUrsbGRrW3t+vyyy9XVlaWsrKy1NDQoMcee0xZWVnyer3q6upSR0dH1O3a2trk8/kkST6f76SrDHu/7h3zdS6XS263O2oDgGRAvOIvpnDNmjVLTU1N2rVrl7PNmDFD8+bNc/6dnZ2t+vp65zbNzc1qaWmR3++XJPn9fjU1Nam9vd0ZU1dXJ7fbraKiokE6LQCIH+IVXzG9xzVmzBhdeumlUftGjRqlsWPHOvsXLFigqqoq5eXlye12a/HixfL7/Zo5c6Ykafbs2SoqKtL8+fO1cuVKBYNB3XfffaqsrJTL5Rqk0wKA+OI9r/iJ+eKMb/LII48oMzNT5eXlikQiKi0t1RNPPOEcHzZsmNatW6eFCxfK7/dr1KhRqqio0EMPPTTYUwGAuCJe8ZFhjH1LHA6H5fF4FAqFeL8LQNIhXt9sIM/j/K5CABhkvOc1tAgXAAwB4jV0CBcADBHiNTQIFwAMIeI1+AgXAAwx4jW4CBcAxAHxGjyECwDihHgNDsIFAHFEvAaOcAFAnBGvgSFcAJAAxKv/CBcAJAjx6h/CBQAJRLxiR7gAIMGIV2wIFwAkAeJ15ggXACQJ4nVmCBcAJBHi9c0IFwAkGeJ1eoQLAJIQ8To1wgUASYp49Y1wAUASI14nI1wAkOSIVzTCBQAWIF5fIlwAYAnidQLhAgCLEC/CBQDWSfd4ES4AsJAxiZ5B4hAuAIBVCBcAWC7dfmxIuADAcun2nhfhAoAUkE7xIlwAkCLSJV6ECwBSSDrEi3ABQIpJ9XgRLgBIQakcL8IFACkqVeNFuAAghaVivAgXAKS4VIsX4QKANJBK8SJcAJAmUiVehAsA0kgqxItwAUCasT1ehAsA0pDN8SJcAJCmbI0X4QKANGbjX1ImXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAq8QUrgceeEAZGRlR2+TJk53jR48eVWVlpcaOHavRo0ervLxcbW1tUffR0tKisrIyjRw5Uvn5+Vq6dKmOHz8+OGcDAEh5WbHe4JJLLtEbb7zx5R1kfXkXd999t1555RXV1tbK4/Fo0aJFmjNnjt5++21JUnd3t8rKyuTz+fTOO+/o4MGDuvXWW5Wdna3f/va3g3A6AIBUF3O4srKy5PP5TtofCoX05z//Wc8995yuu+46SdLTTz+tiy++WFu2bNHMmTP1+uuva+/evXrjjTfk9Xp12WWX6de//rV+8Ytf6IEHHlBOTs7AzwgAkNJifo9r//79Kigo0Hnnnad58+appaVFktTY2Khjx46ppKTEGTt58mQVFhYqEAhIkgKBgKZMmSKv1+uMKS0tVTgc1p49e075mJFIROFwOGoDAKSnmMJVXFys1atX69VXX9WqVat04MABXXPNNers7FQwGFROTo5yc3OjbuP1ehUMBiVJwWAwKlq9x3uPnUpNTY08Ho+zTZw4MZZpAwBSSEw/Krzhhhucf0+dOlXFxcU655xz9MILL2jEiBGDPrle1dXVqqqqcr4Oh8PECwDS1IAuh8/NzdWFF16oDz74QD6fT11dXero6Iga09bW5rwn5vP5TrrKsPfrvt436+VyueR2u6M2AEB6GlC4Dh8+rA8//FATJkzQ9OnTlZ2drfr6eud4c3OzWlpa5Pf7JUl+v19NTU1qb293xtTV1cntdquoqGggUwEApImYflT4s5/9TDfeeKPOOeccffLJJ1q+fLmGDRumuXPnyuPxaMGCBaqqqlJeXp7cbrcWL14sv9+vmTNnSpJmz56toqIizZ8/XytXrlQwGNR9992nyspKuVyuITlBAEBqiSlc//3vfzV37lx9/vnnGj9+vK6++mpt2bJF48ePlyQ98sgjyszMVHl5uSKRiEpLS/XEE084tx82bJjWrVunhQsXyu/3a9SoUaqoqNBDDz00uGcFAEhZGcYYk+hJxCocDsvj8SgUCvF+FwBYaCDP4zF/ADkZ9LaWz3MBgJ16n7/789rJynB9/vnnksQl8QBguc7OTnk8nphuY2W48vLyJJ34hb2xnnC66P2sW2trKz9O7QPrc3qsz+mxPqd3JutjjFFnZ6cKCgpivn8rw5WZeeIqfo/HwzfNN+Bzb6fH+pwe63N6rM/pfdP69PeFB3+PCwBgFcIFALCKleFyuVxavnw5H1o+Ddbo9Fif02N9To/1Ob2hXh8rP8cFAEhfVr7iAgCkL8IFALAK4QIAWIVwAQCsYmW4Hn/8cZ177rkaPny4iouLtW3btkRPKS42b96sG2+8UQUFBcrIyNCLL74YddwYo2XLlmnChAkaMWKESkpKtH///qgxhw4d0rx58+R2u5Wbm6sFCxbo8OHDcTyLoVNTU6MrrrhCY8aMUX5+vm6++WY1NzdHjTl69KgqKys1duxYjR49WuXl5Sf9cdOWlhaVlZVp5MiRys/P19KlS3X8+PF4nsqQWLVqlaZOnep8KNTv92vDhg3O8XRem76sWLFCGRkZWrJkibMvndfogQceUEZGRtQ2efJk53hc18ZYZs2aNSYnJ8f85S9/MXv27DG33367yc3NNW1tbYme2pBbv369+dWvfmX+/ve/G0lm7dq1UcdXrFhhPB6PefHFF82//vUv84Mf/MBMmjTJfPHFF86Y66+/3kybNs1s2bLF/POf/zTnn3++mTt3bpzPZGiUlpaap59+2uzevdvs2rXLfP/73zeFhYXm8OHDzpg777zTTJw40dTX15sdO3aYmTNnmu985zvO8ePHj5tLL73UlJSUmJ07d5r169ebcePGmerq6kSc0qD6xz/+YV555RXz73//2zQ3N5tf/vKXJjs72+zevdsYk95r83Xbtm0z5557rpk6daq56667nP3pvEbLly83l1xyiTl48KCzffrpp87xeK6NdeG68sorTWVlpfN1d3e3KSgoMDU1NQmcVfx9PVw9PT3G5/OZhx9+2NnX0dFhXC6Xef75540xxuzdu9dIMtu3b3fGbNiwwWRkZJiPP/44bnOPl/b2diPJNDQ0GGNOrEd2drapra11xuzbt89IMoFAwBhz4j8OMjMzTTAYdMasWrXKuN1uE4lE4nsCcXDWWWeZp556irX5is7OTnPBBReYuro6c+211zrhSvc1Wr58uZk2bVqfx+K9Nlb9qLCrq0uNjY0qKSlx9mVmZqqkpESBQCCBM0u8AwcOKBgMRq2Nx+NRcXGxszaBQEC5ubmaMWOGM6akpESZmZnaunVr3Oc81EKhkKQvfylzY2Ojjh07FrVGkydPVmFhYdQaTZkyRV6v1xlTWlqqcDisPXv2xHH2Q6u7u1tr1qzRkSNH5Pf7WZuvqKysVFlZWdRaSHz/SNL+/ftVUFCg8847T/PmzVNLS4uk+K+NVb9k97PPPlN3d3fUiUuS1+vV+++/n6BZJYdgMChJfa5N77FgMKj8/Pyo41lZWcrLy3PGpIqenh4tWbJEV111lS699FJJJ84/JydHubm5UWO/vkZ9rWHvMds1NTXJ7/fr6NGjGj16tNauXauioiLt2rUr7ddGktasWaN3331X27dvP+lYun//FBcXa/Xq1brooot08OBBPfjgg7rmmmu0e/fuuK+NVeECzlRlZaV2796tt956K9FTSSoXXXSRdu3apVAopL/97W+qqKhQQ0NDoqeVFFpbW3XXXXeprq5Ow4cPT/R0ks4NN9zg/Hvq1KkqLi7WOeecoxdeeEEjRoyI61ys+lHhuHHjNGzYsJOuVGlra5PP50vQrJJD7/mfbm18Pp/a29ujjh8/flyHDh1KqfVbtGiR1q1bpzfffFNnn322s9/n86mrq0sdHR1R47++Rn2tYe8x2+Xk5Oj888/X9OnTVVNTo2nTpunRRx9lbXTix13t7e26/PLLlZWVpaysLDU0NOixxx5TVlaWvF5v2q/RV+Xm5urCCy/UBx98EPfvH6vClZOTo+nTp6u+vt7Z19PTo/r6evn9/gTOLPEmTZokn88XtTbhcFhbt2511sbv96ujo0ONjY3OmI0bN6qnp0fFxcVxn/NgM8Zo0aJFWrt2rTZu3KhJkyZFHZ8+fbqys7Oj1qi5uVktLS1Ra9TU1BQV+Lq6OrndbhUVFcXnROKop6dHkUiEtZE0a9YsNTU1adeuXc42Y8YMzZs3z/l3uq/RVx0+fFgffvihJkyYEP/vn5gvLUmwNWvWGJfLZVavXm327t1r7rjjDpObmxt1pUqq6uzsNDt37jQ7d+40kszvfvc7s3PnTvPRRx8ZY05cDp+bm2teeukl895775mbbrqpz8vhv/3tb5utW7eat956y1xwwQUpczn8woULjcfjMZs2bYq6ZPd///ufM+bOO+80hYWFZuPGjWbHjh3G7/cbv9/vHO+9ZHf27Nlm165d5tVXXzXjx49PicuZ7733XtPQ0GAOHDhg3nvvPXPvvfeajIwM8/rrrxtj0nttTuWrVxUak95rdM8995hNmzaZAwcOmLffftuUlJSYcePGmfb2dmNMfNfGunAZY8wf/vAHU1hYaHJycsyVV15ptmzZkugpxcWbb75pJJ20VVRUGGNOXBJ///33G6/Xa1wul5k1a5Zpbm6Ouo/PP//czJ0714wePdq43W5z2223mc7OzgSczeDra20kmaefftoZ88UXX5if/vSn5qyzzjIjR440P/zhD83Bgwej7uc///mPueGGG8yIESPMuHHjzD333GOOHTsW57MZfD/5yU/MOeecY3Jycsz48ePNrFmznGgZk95rcypfD1c6r9Ett9xiJkyYYHJycsy3vvUtc8stt5gPPvjAOR7PteHPmgAArGLVe1wAABAuAIBVCBcAwCqECwBgFcIFALAK4QIAWIVwAQCsQrgAAFYhXAAAqxAuAIBVCBcAwCqECwBglf8DEukxAMdmP7YAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "v_weight = model.model.layers[0].self_attn.v_proj.weight.data.to(\"cpu\")\n",
    "plt.imshow(v_weight@v_weight.T, cmap=cmap, interpolation='none')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x7f581f741510>"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAAGiCAYAAAC/NyLhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAirUlEQVR4nO3da3BU5eHH8V9CkuW6GwNkl1SCOKIYBaygYauO85eUSDNWS15Yh9HUMjrSwICxtKZVUNtpGJyp1VaxM7bgC5VKp2hFvMSgsepyMUINF1NwqEmVTVQmu4HKBpLn/4Lm6EII2ez17H4/MztDznl29zlP0/16dk+SLGOMEQAANpGd7AkAABAJwgUAsBXCBQCwFcIFALAVwgUAsBXCBQCwFcIFALAVwgUAsBXCBQCwFcIFALCVpIXr8ccf13nnnafhw4ertLRU27dvT9ZUAAA2kpRw/eUvf1FNTY1WrlypDz74QDNmzFB5ebk6OjqSMR0AgI1kJeOX7JaWluqKK67QH/7wB0lSb2+vJk6cqCVLlujee+9N9HQAADaSk+gn7O7uVlNTk2pra61t2dnZKisrk8/n6/c+oVBIoVDI+rq3t1eHDx/W2LFjlZWVFfc5AwBiyxijrq4uFRUVKTs7sjf/Eh6uL774Qj09PXK73WHb3W63Pvroo37vU1dXpwcffDAR0wMAJFBbW5vOPffciO6T8HANRW1trWpqaqyvA4GAiouL1dbWpokTnQoEkjg5AEDEgsGgJk6cqDFjxkR834SHa9y4cRo2bJja29vDtre3t8vj8fR7H4fDIYfDcdp2p9MpY5zKypL4c5gAYD9D+bgn4VcV5uXlaebMmWpoaLC29fb2qqGhQV6vd0iPaYzER10AkBmS8lZhTU2NqqqqNGvWLF155ZX63e9+p6NHj+r2228f8mP2xYszLwBIb0kJ180336zPP/9cK1askN/v12WXXaZXX331tAs2IkW8ACD9JeXnuKIVDAblcrkUCATkdDpP20+8ACC1ne11fCBp+bsK+cwLANJXWoZLIl4AkK7SNlwS8QKAdJTW4ZKIFwCkm7QPl0S8ACCdZES4JOIFAOkiY8IlES8ASAcZFS6JeAGA3WVcuCTiBQB2lpHhkogXANhVxoZLIl4AYEcZHS6JeAGA3WR8uCTiBQB2Qrj+h3gBgD0Qrm8gXgCQ+gjXKYgXAKQ2wtUP4gUAqYtwnQHxAoDURLgGQLwAIPUQrrMgXgCQWgjXIBAvAEgdhGuQiBcApAbCFQHiBQDJR7giRLwAILkI1xAQLwBIHsI1RMQLAJKDcEWBeAFA4hGuKBEvAEgswhUDxAsAEodwxQjxAoDEIFwxRLwAIP4IV4wRLwCIL8IVB8QLAOKHcMUJ8QKA+CBccUS8ACD2CFecES8AiC3ClQDGJHsGAJA+CBcAwFYIV4LxtiEARIdwJRifeQFAdAhXEhAvABg6wpUkxAsAhoZwJRHxAoDIEa4kI14AEBnClQKIFwAMHuFKEcQLAAaHcKUQ4gUAZ0e4UgzxAoCBEa4URLwA4MwIV4oiXgDQP8KVwogXAJyOcKU44gUA4QiXDRAvAPga4bIJ4gUAJxEuGyFeAEC4bId4Ach0hMuGiBeATEa4bIp4AchUhMvGiBeATES4bI54Acg0hCsNEC8AmSTicL399tu64YYbVFRUpKysLL3wwgth+40xWrFihSZMmKARI0aorKxM+/fvDxtz+PBhLViwQE6nU/n5+Vq4cKGOHDkS1YFkOuIFIFNEHK6jR49qxowZevzxx/vdv3r1aj322GN68skntW3bNo0aNUrl5eU6duyYNWbBggXas2eP6uvrtWnTJr399tu68847h34UkES8AGQIEwVJZuPGjdbXvb29xuPxmIcfftja1tnZaRwOh3nuueeMMcbs3bvXSDI7duywxrzyyismKyvLfPrpp4N63kAgYCSZQCAQzfTTVnT/qwJA/EXzOh7Tz7gOHjwov9+vsrIya5vL5VJpaal8Pp8kyefzKT8/X7NmzbLGlJWVKTs7W9u2bev3cUOhkILBYNgNZ8aZF4B0FtNw+f1+SZLb7Q7b7na7rX1+v1+FhYVh+3NyclRQUGCNOVVdXZ1cLpd1mzhxYiynnZaIF4B0ZYurCmtraxUIBKxbW1tbsqdkC8QLQDqKabg8Ho8kqb29PWx7e3u7tc/j8aijoyNs/4kTJ3T48GFrzKkcDoecTmfYDYNDvACkm5iGa/LkyfJ4PGpoaLC2BYNBbdu2TV6vV5Lk9XrV2dmppqYma8yWLVvU29ur0tLSWE4H/0O8AKSTnEjvcOTIER04cMD6+uDBg9q1a5cKCgpUXFysZcuW6de//rWmTJmiyZMn6/7771dRUZFuuukmSdLFF1+s66+/XnfccYeefPJJHT9+XIsXL9YPf/hDFRUVxezAEK4vXsYkeyYAEJ2Iw/X+++/r//7v/6yva2pqJElVVVVat26dfvazn+no0aO688471dnZqauvvlqvvvqqhg8fbt3nmWee0eLFizVnzhxlZ2ersrJSjz32WAwOBwMhXgDSQZYx9nsZCwaDcrlcCgQCfN41BMQLQLJF8zpui6sKEVt85gXAzghXhiJeAOyKcGUw4gXAjghXhiNeAOyGcIF4AbAVwgVJxAuAfRAuWIgXADsgXAhDvACkOsKF0xAvAKmMcKFfxAtAqiJcOCPiBSAVES4MiHgBSDWEC2dFvACkEsKFQSFeAFIF4cKgES8AqYBwISLEC0CyES5EjHgBSCbChSEhXgCShXBhyIgXgGQgXIgK8QKQaIQLUSNeABKJcCEmiBeARCFciBniBSARCBdiingBiDfChZgjXgDiiXAhLogXgHghXIgb4gUgHggX4op4AYg1woW4I14AYolwISGIF4BYIVxIGOIFIBYIFxKKeAGIFuFCwhEvANEgXEgK4gVgqAgXkoZ4ARgKwoWkIl4AIkW4kHTEC0AkCBdSAvECMFiECymDeAEYDMKFlEK8AJwN4ULKIV4ABkK4kJKIF4AzIVxIWcQLQH8IF1Ia8QJwKsKFlEe8AHwT4YItEC8AfQgXbIN4AZAIF2yGeAEgXLAd4gVkNsIFWyJeQOYiXLAt4gVkJsIFWyNeQOYhXLA9Y5I9AwCJRLiQNjjzAjID4ULa4G1DIDMQLqQV4gWkP8KFtEO8gPRGuJCWiBeQvggX0hbxAtIT4UJaI15A+iFcSHvEC0gvEYWrrq5OV1xxhcaMGaPCwkLddNNNamlpCRtz7NgxVVdXa+zYsRo9erQqKyvV3t4eNqa1tVUVFRUaOXKkCgsLtXz5cp04cSL6owHOgHgB6SOicDU2Nqq6ulpbt25VfX29jh8/rrlz5+ro0aPWmLvvvlsvvfSSNmzYoMbGRn322WeaP3++tb+np0cVFRXq7u7We++9p6efflrr1q3TihUrYndUQD+IF5AmTBQ6OjqMJNPY2GiMMaazs9Pk5uaaDRs2WGP27dtnJBmfz2eMMWbz5s0mOzvb+P1+a8yaNWuM0+k0oVBoUM8bCASMJBMIBKKZPjJUdN/1AGIhmtfxqD7jCgQCkqSCggJJUlNTk44fP66ysjJrzNSpU1VcXCyfzydJ8vl8mjZtmtxutzWmvLxcwWBQe/bs6fd5QqGQgsFg2A0YKs68AHsbcrh6e3u1bNkyXXXVVbr00kslSX6/X3l5ecrPzw8b63a75ff7rTHfjFbf/r59/amrq5PL5bJuEydOHOq0AUnEC7CzIYerurpau3fv1vr162M5n37V1tYqEAhYt7a2trg/J9If8QLsKWcod1q8eLE2bdqkt99+W+eee6613ePxqLu7W52dnWFnXe3t7fJ4PNaY7du3hz1e31WHfWNO5XA45HA4hjJVYEB98eJPowD2EdEZlzFGixcv1saNG7VlyxZNnjw5bP/MmTOVm5urhoYGa1tLS4taW1vl9XolSV6vV83Nzero6LDG1NfXy+l0qqSkJJpjAYaEMy/AXiI646qurtazzz6rF198UWPGjLE+k3K5XBoxYoRcLpcWLlyompoaFRQUyOl0asmSJfJ6vZo9e7Ykae7cuSopKdGtt96q1atXy+/367777lN1dTVnVUgazrwA+8gyZvD/V806w3+Wrl27Vj/60Y8knfwB5HvuuUfPPfecQqGQysvL9cQTT4S9DfjJJ59o0aJFeuuttzRq1ChVVVVp1apVyskZXEeDwaBcLpcCgYCcTudgpw+cFfECEiOa1/GIwpUqCBfiiXgB8RfN6zi/qxA4BZ95AamNcAH9IF5A6iJcwBkQLyA1ES5gAMQLSD2ECzgL4gWkFsIFDALxAlIH4QIGiXgBqYFwAREgXkDyES4gQsQLSC7CBQwB8QKSh3ABQ0S8gOQgXEAUiBeQeIQLiBLxAhKLcAExQLyAxCFcQIwQLyAxCBcQQ8QLiD/CBcQY8QLii3ABcUC8gPghXECcEC8gPggXEEfEC4g9wgXEGfECYotwAQlAvIDYIVxAghAvIDYIF5BAxAuIHuECEox4AdEhXEASEC9g6AgXkCTECxgawgUkEfECIke4gCQjXkBkCBeQAogXMHiEC0gRxAsYHMIFpBDiBZwd4QJSDPECBka4gBREvIAzI1xAiiJeQP8IF5DCiBdwOsIFpDjiBYQjXIANEC/ga4QLsAniBZxEuAAbIV4A4QJsh3gh0xEuwIaIFzIZ4QJsinghUxEuwMaIFzIR4QJsjngh0xAuIA0QL2QSwgWkCeKFTEG4gDRCvJAJCBeQZogX0h3hAtIQ8UI6I1xAmiJeSFeEC0hjxAvpiHABaY54Id0QLiADEC+kE8IFZAjihXRBuIAMQryQDggXkGGIF+yOcAEZiHjBzggXkKGIF+yKcAEZjHjBjggXkOGIF+yGcAEgXrCViMK1Zs0aTZ8+XU6nU06nU16vV6+88oq1/9ixY6qurtbYsWM1evRoVVZWqr29PewxWltbVVFRoZEjR6qwsFDLly/XiRMnYnM0AIaMeMEuIgrXueeeq1WrVqmpqUnvv/++rrvuOt14443as2ePJOnuu+/WSy+9pA0bNqixsVGfffaZ5s+fb92/p6dHFRUV6u7u1nvvvaenn35a69at04oVK2J7VACGhHjBFkyUzjnnHPPUU0+Zzs5Ok5ubazZs2GDt27dvn5FkfD6fMcaYzZs3m+zsbOP3+60xa9asMU6n04RCoUE/ZyAQMJJMIBCIdvoA+hH9KwMwsGhex4f8GVdPT4/Wr1+vo0ePyuv1qqmpScePH1dZWZk1ZurUqSouLpbP55Mk+Xw+TZs2TW632xpTXl6uYDBonbX1JxQKKRgMht0AxA9nXkhlEYerublZo0ePlsPh0F133aWNGzeqpKREfr9feXl5ys/PDxvvdrvl9/slSX6/Pyxaffv79p1JXV2dXC6XdZs4cWKk0wYQIeKFVBVxuC666CLt2rVL27Zt06JFi1RVVaW9e/fGY26W2tpaBQIB69bW1hbX5wNwEvFCKsqJ9A55eXm64IILJEkzZ87Ujh079Oijj+rmm29Wd3e3Ojs7w8662tvb5fF4JEkej0fbt28Pe7y+qw77xvTH4XDI4XBEOlUAMdAXL2OSPRPgpKh/jqu3t1ehUEgzZ85Ubm6uGhoarH0tLS1qbW2V1+uVJHm9XjU3N6ujo8MaU19fL6fTqZKSkminAiBOOPNCKonojKu2tlbz5s1TcXGxurq69Oyzz+qtt97Sa6+9JpfLpYULF6qmpkYFBQVyOp1asmSJvF6vZs+eLUmaO3euSkpKdOutt2r16tXy+/267777VF1dzRkVkOI480KqiChcHR0duu2223To0CG5XC5Nnz5dr732mr773e9Kkh555BFlZ2ersrJSoVBI5eXleuKJJ6z7Dxs2TJs2bdKiRYvk9Xo1atQoVVVV6aGHHortUQGIC+KFVJBljP2+BYPBoFwulwKBgJxOZ7KnA2Qc4oVoRfM6zu8qBBAxPvNCMhEuAENCvJAshAvAkBEvJAPhAhAV4oVEI1wAoka8kEiEC0BMEC8kCuECEDPEC4lAuADEFPFCvBEuADHHDycjnggXgLjhzAvxQLgAxA1vGyIeCBeAuCJeiDXCBSDu+MwLsUS4ACQMZ16IBcIFIGF42xCxQLgAJBRvGyJahAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAYCuECwBgK4QLAGArhAsAJGVlJXsGGCzCBQCSjCFedkG4AOB/iJc9EC4A+AbilfoIFwCcgnilNsIFAP0gXqmLcAHAGRCv1ES4AGAAxCv1EC4AOAvilVqiCteqVauUlZWlZcuWWduOHTum6upqjR07VqNHj1ZlZaXa29vD7tfa2qqKigqNHDlShYWFWr58uU6cOBHNVAAgrohX6hhyuHbs2KE//vGPmj59etj2u+++Wy+99JI2bNigxsZGffbZZ5o/f761v6enRxUVFeru7tZ7772np59+WuvWrdOKFSuGfhQAkADEK0WYIejq6jJTpkwx9fX15tprrzVLly41xhjT2dlpcnNzzYYNG6yx+/btM5KMz+czxhizefNmk52dbfx+vzVmzZo1xul0mlAoNKjnDwQCRpIJBAJDmT4ARGVor5z4pmhex4d0xlVdXa2KigqVlZWFbW9qatLx48fDtk+dOlXFxcXy+XySJJ/Pp2nTpsntdltjysvLFQwGtWfPnn6fLxQKKRgMht0AIFk480qunEjvsH79en3wwQfasWPHafv8fr/y8vKUn58ftt3tdsvv91tjvhmtvv19+/pTV1enBx98MNKpAkDc9MXLmGTPJPNEdMbV1tampUuX6plnntHw4cPjNafT1NbWKhAIWLe2traEPTcAnAlnXskRUbiamprU0dGhyy+/XDk5OcrJyVFjY6Mee+wx5eTkyO12q7u7W52dnWH3a29vl8fjkSR5PJ7TrjLs+7pvzKkcDoecTmfYDQBSAfFKvIjCNWfOHDU3N2vXrl3WbdasWVqwYIH179zcXDU0NFj3aWlpUWtrq7xeryTJ6/WqublZHR0d1pj6+no5nU6VlJTE6LAAIHGIV2JF9BnXmDFjdOmll4ZtGzVqlMaOHWttX7hwoWpqalRQUCCn06klS5bI6/Vq9uzZkqS5c+eqpKREt956q1avXi2/36/77rtP1dXVcjgcMTosAEgsPvNKnIgvzjibRx55RNnZ2aqsrFQoFFJ5ebmeeOIJa/+wYcO0adMmLVq0SF6vV6NGjVJVVZUeeuihWE8FABKKeCVGljH2W+JgMCiXy6VAIMDnXQBSDvE6u2hex/ldhQAQY3zmFV+ECwDigHjFD+ECgDghXvFBuAAgjohX7BEuAIgz4hVbhAsAEoB4xQ7hAoAEIV6xQbgAIIGIV/QIFwAkGPGKDuECgCQgXkNHuAAgSYjX0BAuAEgi4hU5wgUASUa8IkO4ACAFEK/BI1wAkCKI1+AQLgBIIcTr7AgXAKQY4jUwwgUAKYh4nRnhAoAURbz6R7gAIIURr9MRLgBIccQrHOECABsgXl8jXABgE8TrJMIFADZCvAgXANhOpseLcAGADWVyvAgXANhUpsaLcAGAjWVivAgXANhcpsWLcAFAGsikeBEuAEgTmRIvwgUAaSQT4kW4ACDNpHu8CBcApKF0jhfhAoA0la7xIlwAkMbSMV6ECwDSXLrFi3ABQAZIp3gRLgDIEOkSL8IFABkkHeJFuAAgw9g9XoQLADKQneNFuAAgQ9k1XoQLADKYHeNFuAAgw9ktXoQLAGCreBEuAIAk+8SLcAEALMYkewZnR7gAALZCuAAA/UrVtw0JFwCgX6n6tiHhAgDYCuECANgK4QIA2ArhAgAMWipcsEG4AACDlgoXbBAuAICtEC4AgK0QLgCArRAuAICtEC4AgK1EFK4HHnhAWVlZYbepU6da+48dO6bq6mqNHTtWo0ePVmVlpdrb28Meo7W1VRUVFRo5cqQKCwu1fPlynThxIjZHAwBIezmR3uGSSy7RG2+88fUD5Hz9EHfffbdefvllbdiwQS6XS4sXL9b8+fP17rvvSpJ6enpUUVEhj8ej9957T4cOHdJtt92m3Nxc/eY3v4nB4QAA0l3E4crJyZHH4zlteyAQ0J/+9Cc9++yzuu666yRJa9eu1cUXX6ytW7dq9uzZev3117V371698cYbcrvduuyyy/SrX/1KP//5z/XAAw8oLy8v+iMCAKS1iD/j2r9/v4qKinT++edrwYIFam1tlSQ1NTXp+PHjKisrs8ZOnTpVxcXF8vl8kiSfz6dp06bJ7XZbY8rLyxUMBrVnz54zPmcoFFIwGAy7AQAyU0ThKi0t1bp16/Tqq69qzZo1OnjwoK655hp1dXXJ7/crLy9P+fn5Yfdxu93y+/2SJL/fHxatvv19+86krq5OLpfLuk2cODGSaQMA0khEbxXOmzfP+vf06dNVWlqqSZMm6fnnn9eIESNiPrk+tbW1qqmpsb4OBoPECwAyVFSXw+fn5+vCCy/UgQMH5PF41N3drc7OzrAx7e3t1mdiHo/ntKsM+77u73OzPg6HQ06nM+wGAMhMUYXryJEj+vjjjzVhwgTNnDlTubm5amhosPa3tLSotbVVXq9XkuT1etXc3KyOjg5rTH19vZxOp0pKSqKZCgAgQ0T0VuFPf/pT3XDDDZo0aZI+++wzrVy5UsOGDdMtt9wil8ulhQsXqqamRgUFBXI6nVqyZIm8Xq9mz54tSZo7d65KSkp06623avXq1fL7/brvvvtUXV0th8MRlwMEAKSGrKzY/Hb5iML1n//8R7fccou+/PJLjR8/XldffbW2bt2q8ePHS5IeeeQRZWdnq7KyUqFQSOXl5XriiSes+w8bNkybNm3SokWL5PV6NWrUKFVVVemhhx6K/kgAACktVn8SJcuYVPjrKpEJBoNyuVwKBAJ83gUANhTN63jEP4CcCvpay89zAYA99b1+D+XcyZbh+vLLLyWJS+IBwOa6urrkcrkiuo8tw1VQUCDp5C/sjfSAM0Xfz7q1tbXxdmo/WJ+BsT4DY30GNpj1Mcaoq6tLRUVFET++LcOVnX3yKn6Xy8U3zVnwc28DY30GxvoMjPUZ2NnWZ6gnHvw9LgCArRAuAICt2DJcDodDK1eu5IeWB8AaDYz1GRjrMzDWZ2DxXh9b/hwXACBz2fKMCwCQuQgXAMBWCBcAwFYIFwDAVmwZrscff1znnXeehg8frtLSUm3fvj3ZU0qIt99+WzfccIOKioqUlZWlF154IWy/MUYrVqzQhAkTNGLECJWVlWn//v1hYw4fPqwFCxbI6XQqPz9fCxcu1JEjRxJ4FPFTV1enK664QmPGjFFhYaFuuukmtbS0hI05duyYqqurNXbsWI0ePVqVlZWn/XHT1tZWVVRUaOTIkSosLNTy5ct14sSJRB5KXKxZs0bTp0+3fijU6/XqlVdesfZn8tr0Z9WqVcrKytKyZcusbZm8Rg888ICysrLCblOnTrX2J3RtjM2sX7/e5OXlmT//+c9mz5495o477jD5+fmmvb092VOLu82bN5tf/vKX5m9/+5uRZDZu3Bi2f9WqVcblcpkXXnjB/POf/zTf//73zeTJk81XX31ljbn++uvNjBkzzNatW80//vEPc8EFF5hbbrklwUcSH+Xl5Wbt2rVm9+7dZteuXeZ73/ueKS4uNkeOHLHG3HXXXWbixImmoaHBvP/++2b27NnmO9/5jrX/xIkT5tJLLzVlZWVm586dZvPmzWbcuHGmtrY2GYcUU3//+9/Nyy+/bP71r3+ZlpYW84tf/MLk5uaa3bt3G2Mye21OtX37dnPeeeeZ6dOnm6VLl1rbM3mNVq5caS655BJz6NAh6/b5559b+xO5NrYL15VXXmmqq6utr3t6ekxRUZGpq6tL4qwS79Rw9fb2Go/HYx5++GFrW2dnp3E4HOa5554zxhizd+9eI8ns2LHDGvPKK6+YrKws8+mnnyZs7onS0dFhJJnGxkZjzMn1yM3NNRs2bLDG7Nu3z0gyPp/PGHPyPw6ys7ON3++3xqxZs8Y4nU4TCoUSewAJcM4555innnqKtfmGrq4uM2XKFFNfX2+uvfZaK1yZvkYrV640M2bM6HdfotfGVm8Vdnd3q6mpSWVlZda27OxslZWVyefzJXFmyXfw4EH5/f6wtXG5XCotLbXWxufzKT8/X7NmzbLGlJWVKTs7W9u2bUv4nOMtEAhI+vqXMjc1Nen48eNhazR16lQVFxeHrdG0adPkdrutMeXl5QoGg9qzZ08CZx9fPT09Wr9+vY4ePSqv18vafEN1dbUqKirC1kLi+0eS9u/fr6KiIp1//vlasGCBWltbJSV+bWz1S3a/+OIL9fT0hB24JLndbn300UdJmlVq8Pv9ktTv2vTt8/v9KiwsDNufk5OjgoICa0y66O3t1bJly3TVVVfp0ksvlXTy+PPy8pSfnx829tQ16m8N+/bZXXNzs7xer44dO6bRo0dr48aNKikp0a5duzJ+bSRp/fr1+uCDD7Rjx47T9mX6909paanWrVuniy66SIcOHdKDDz6oa665Rrt370742tgqXMBgVVdXa/fu3XrnnXeSPZWUctFFF2nXrl0KBAL661//qqqqKjU2NiZ7Wimhra1NS5cuVX19vYYPH57s6aScefPmWf+ePn26SktLNWnSJD3//PMaMWJEQudiq7cKx40bp2HDhp12pUp7e7s8Hk+SZpUa+o5/oLXxeDzq6OgI23/ixAkdPnw4rdZv8eLF2rRpk958802de+651naPx6Pu7m51dnaGjT91jfpbw759dpeXl6cLLrhAM2fOVF1dnWbMmKFHH32UtdHJt7s6Ojp0+eWXKycnRzk5OWpsbNRjjz2mnJwcud3ujF+jb8rPz9eFF16oAwcOJPz7x1bhysvL08yZM9XQ0GBt6+3tVUNDg7xebxJnlnyTJ0+Wx+MJW5tgMKht27ZZa+P1etXZ2ammpiZrzJYtW9Tb26vS0tKEzznWjDFavHixNm7cqC1btmjy5Mlh+2fOnKnc3NywNWppaVFra2vYGjU3N4cFvr6+Xk6nUyUlJYk5kATq7e1VKBRibSTNmTNHzc3N2rVrl3WbNWuWFixYYP0709fom44cOaKPP/5YEyZMSPz3T8SXliTZ+vXrjcPhMOvWrTN79+41d955p8nPzw+7UiVddXV1mZ07d5qdO3caSea3v/2t2blzp/nkk0+MMScvh8/Pzzcvvvii+fDDD82NN97Y7+Xw3/72t822bdvMO++8Y6ZMmZI2l8MvWrTIuFwu89Zbb4Vdsvvf//7XGnPXXXeZ4uJis2XLFvP+++8br9drvF6vtb/vkt25c+eaXbt2mVdffdWMHz8+LS5nvvfee01jY6M5ePCg+fDDD829995rsrKyzOuvv26Myey1OZNvXlVoTGav0T333GPeeustc/DgQfPuu++asrIyM27cONPR0WGMSeza2C5cxhjz+9//3hQXF5u8vDxz5ZVXmq1btyZ7Sgnx5ptvGkmn3aqqqowxJy+Jv//++43b7TYOh8PMmTPHtLS0hD3Gl19+aW655RYzevRo43Q6ze233266urqScDSx19/aSDJr1661xnz11VfmJz/5iTnnnHPMyJEjzQ9+8ANz6NChsMf597//bebNm2dGjBhhxo0bZ+655x5z/PjxBB9N7P34xz82kyZNMnl5eWb8+PFmzpw5VrSMyey1OZNTw5XJa3TzzTebCRMmmLy8PPOtb33L3HzzzebAgQPW/kSuDX/WBABgK7b6jAsAAMIFALAVwgUAsBXCBQCwFcIFALAVwgUAsBXCBQCwFcIFALAVwgUAsBXCBQCwFcIFALAVwgUAsJX/B5Z2LwlvW61fAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "v_up_weight = model.model.layers[0].self_attn.v_up_proj.weight.data.to(\"cpu\")\n",
    "plt.imshow(v_up_weight.T@v_up_weight, cmap=cmap, interpolation='none')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "transmla",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
